library(maps)
library(ggmap)
## Loading required package: ggplot2
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::map() masks maps::map()
library(nycflights13)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggmap':
##
## wind
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
qplot FunctionThe function qplot() in the package ggplot2
is very similar to the basic plot() function from the R
base package. It can be used to create and combine easily different
types of plots, it’s great for allowing you to produce plots quickly,
However, it remains less flexible than the function
ggplot(), I highly recommend using ggplot() as
it makes it easier to create complex graphics.
This lab provides a brief introduction to qplot(), which
stands for quick plot.
tips <- read.csv("tips.csv")
summary(tips)
## obs totbill tip sex
## Min. : 1.00 Min. : 3.07 Min. : 1.000 Length:244
## 1st Qu.: 61.75 1st Qu.:13.35 1st Qu.: 2.000 Class :character
## Median :122.50 Median :17.80 Median : 2.900 Mode :character
## Mean :122.50 Mean :19.79 Mean : 2.998
## 3rd Qu.:183.25 3rd Qu.:24.13 3rd Qu.: 3.562
## Max. :244.00 Max. :50.81 Max. :10.000
## smoker day time size
## Length:244 Length:244 Length:244 Min. :1.00
## Class :character Class :character Class :character 1st Qu.:2.00
## Mode :character Mode :character Mode :character Median :2.00
## Mean :2.57
## 3rd Qu.:3.00
## Max. :6.00
Basic plot in R
plot(tips$totbill, tips$tip)
Example 1: Histogram
ggplot(tips, aes(x = tip)) +
geom_histogram(binwidth = 0.2)
qplot() is a shortcut of ggplot():
qplot(x, y, data=, geom=, color=variable, fill=, shape=, size=, alpha=, facets=, xlab=, ylab=, main=,)
OR
qplot(variables, geometry, color, size, shape, facet, dataset)
qplot(x = tip, data = tips, geom = "histogram", binwidth = 0.2)
Add blue/green color to the histogram
qplot(x = tip, data = tips, geom = "histogram", fill = "green", binwidth = 0.2)
Apparently, the color is not filled in green. This is because
aesthetic parameters in qplot() always try to map the
aesthetic to a variable. We need to use I(value) to
indicate a specific value.
qplot(x = tip, data = tips, geom = "histogram", fill = I("green"), binwidth = 0.2)
Set color according to the variable sex
qplot(x = tip, data = tips, geom = "histogram", fill = sex, binwidth = 0.2)
Example 2 Scatter plot
qplot(totbill, tip, data = tips, geom = "point", col = sex)
Note: qplot() gives a scatter
plot by default.
qplot(totbill, tip, data = tips, col = sex)
Add more geom arguments by a vector of multiple geom names in turn:
qplot(x = totbill, y = tip, data = tips, geom = c("point", "smooth"), col = sex)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Add labels:
qplot(x = totbill, y = tip, data = tips, geom = c("point", "smooth"), col = sex, xlab = "Total Bill", ylab = "Tip", main = "Plot of Total Bill vs. Tip")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Example 3: Side-by-side box plot sex vs. tip
qplot(x = sex, y = tip, data = tips, geom = "boxplot", col = time, facets = ~smoker)
Example 4: For the mtcars data
set, provide a qplot to see the relation between
wt and mpg, set the color of points to
cyl, give different lm smooth lines according
to cyl.
qplot(x = wt, y = mpg, data = mtcars, geom = c("point", "smooth"), method = "lm", col = factor(cyl))
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'
plotly Packageplotly is an R package for creating interactive
web-based graphs.
There are two main ways to initiate a plotly object in R. The
plot_ly() function transforms data into a plotly object,
while the ggplotly() function transforms a ggplot object
into a plotly object. Regardless of how a plotly object is created,
printing it results in an interactive web-based visualization with
tooltips, zooming, and panning enabled by default.
library(plotly)
There are two ways to make plotly graphs.
ggplotly()plot_ly()ggplotly() FunctionRecall from our qplot example.
qplot(data = mtcars, x = mpg, y = wt, col = factor(cyl), geom = c("point", "smooth"), method = "lm")
## Warning: Ignoring unknown parameters: method
## `geom_smooth()` using formula 'y ~ x'
To create a ggplotly() graph, create the
ggplot() first.
plot <- ggplot(mtcars, aes(x = mpg, y = wt, col = factor(cyl))) +
geom_point() +
geom_smooth(method = "lm")
ggplotly(plot)
## `geom_smooth()` using formula 'y ~ x'
library(gapminder)
glimpse(gapminder)
## Rows: 1,704
## Columns: 6
## $ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
ggplotly() will animate time (frame refers to
animation)
p <- ggplot(gapminder, aes(x = gdpPercap, y = lifeExp, color = continent, size = pop, frame = year, text = country, id = country)) +
geom_point() +
scale_x_log10()
ggplotly(p)
We can also change the animation part in the ggplotly()
function.
transition: the duration of the smooth transition between frame (in milliseconds)
easing: the type of transition easing (linear makes the movement smooth, elastic will make the points “bounce” around)
mode: describes how a new animate call interacts with currently-running animations.
ggplotly(p) %>%
animation_opts(transition = 500, easing = "linear", mode = "immediate")
We can also change the place of the Play button and other details on the graph.
ggplotly(p) %>%
animation_opts(1000, easing = "elastic", redraw = FALSE) %>%
animation_button(x = 1, xanchor = "right", y = 0, yanchor = "bottom") %>%
animation_slider(currentvalue = list(prefix = "YEAR ", font = list(color="red")))
plot_ly() FunctionThe plot_ly() function provides a direct interface to
plotly.js, so anything in the figure reference can be
specified via plot_ly().
A plotly visualization is composed of one (or more) trace(s), and
every trace has a type (the default trace type is
“scatter”) can be used to draw a large amount of geometries along with
the add_XX() functions.
The plot_ly() function has a number of arguments that
make it easier to scale data values to visual aesthetics
(e.g.,*color/colors, symbol/symbols,
linetype/linetypes, size/sizes).
The syntax of plot_ly() is similar to that of
qplot():
plot_ly(data, x, y, symbol, size, type, mode, color)
type is type of graph/depiction
Add ~ to map the variable
plot_ly(data = tips, x = ~tip, type = "histogram")
Or we can use
plot_ly(tips, x = ~tip) %>%
add_histogram()
The code above will produce the same graph, but split into several
lines with add_XX()
To add time as a factor of color:
plot_ly(tips, x = ~tip, color = ~time) %>%
add_histogram()
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
plot_ly(data = tips, x = ~day, type = "bar")
plot_ly() will not do the statistical transformation for
us, so we need to do it ourselves.
tips %>%
count(day) %>%
plot_ly(x = ~day, y = ~n) %>%
add_bars()
Note: Bar traces required bar heights (both x and y), whereas histogram traces require just a single variable
Example:
# Produce a correlation table
corr <- cor(dplyr::select_if(diamonds, is.numeric))
corr
## carat depth table price x y
## carat 1.00000000 0.02822431 0.1816175 0.9215913 0.97509423 0.95172220
## depth 0.02822431 1.00000000 -0.2957785 -0.0106474 -0.02528925 -0.02934067
## table 0.18161755 -0.29577852 1.0000000 0.1271339 0.19534428 0.18376015
## price 0.92159130 -0.01064740 0.1271339 1.0000000 0.88443516 0.86542090
## x 0.97509423 -0.02528925 0.1953443 0.8844352 1.00000000 0.97470148
## y 0.95172220 -0.02934067 0.1837601 0.8654209 0.97470148 1.00000000
## z 0.95338738 0.09492388 0.1509287 0.8612494 0.97077180 0.95200572
## z
## carat 0.95338738
## depth 0.09492388
## table 0.15092869
## price 0.86124944
## x 0.97077180
## y 0.95200572
## z 1.00000000
# Produce a correlation heat map
plot_ly() %>%
add_heatmap(x = rownames(corr), y = colnames(corr), z = corr) %>%
colorbar(limits = c(-1, 1))
plot_ly(data = tips, y = ~tip, type = "box")
p1 <- plot_ly(data = tips, y = ~tip) %>%
add_boxplot
p2 <- plot_ly(data = tips, y = ~tip, x = ~time) %>%
add_boxplot
subplot(p1, p2)
plot_ly(tips, y = ~tip, x = ~interaction(time, day)) %>%
add_boxplot(color = ~time)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
Add more attributes to the boxplot
# Add points to the boxplot, pointpos moves where the points are and marker changes the color of the points
plot_ly(tips, x = ~tip, type = "box", boxpoints = "all", pointpos = 1, marker = list(color = "green")) %>%
layout(title = "boxplot of tips") #adds a title to the boxplot
For two quantitative variables, plot_ly() defaults to a scatterplot, but you can also be explicit about adding a layer of markers/points via the add_markers() function.
plot_ly(data = tips, x = ~totbill, y = ~tip)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
plot_ly(data = tips, x = ~totbill, y = ~tip, text = ~day) %>%
add_markers(color = ~sex)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
plot_ly(data = tips, x = ~totbill, y = ~tip, color = ~sex, text = ~day, mode = "markers")
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
plot_ly(data = tips, x = ~totbill, y = ~tip, color = ~sex, type = "scatter", text = ~day, mode = "markers+text")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
plot_ly(tips, x = ~totbill, y = ~tip, type = "scatter", mode = "text", text = ~day)
plot_ly(tips, x = ~totbill, y = ~tip, type = "scatter", mode = "text", text = ~paste("day:", day))
Example 1: For gapminder, select year at 2002, make a plotly object to visualize the relation between log(gdpPercap) and lifeExp, set color according to continent, size according to population, text according to country, mark the largest lifeExp with the text “Longest LifeExp” in the plot.
gapminder %>%
filter(year == 2002) %>%
plot_ly(x = ~log(gdpPercap), y = ~lifeExp, color = ~continent, size = ~pop, text = ~country) %>%
add_markers() %>%
slice(which.max(lifeExp)) %>%
add_annotations(text = "Longest LifeExp")
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
## Warning: `line.width` does not currently support multiple values.
Example 2: Mark the max and min mpg
plot_ly(data = mtcars, x = ~wt, y = ~mpg) %>%
slice(c(which.max(mpg), which.min(mpg))) %>%
add_annotations(text = c("Max mpg", "Min mpg"))
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
If we only want to point out the values at the maximum and minimum:
plot_ly(data = mtcars, x = ~wt, y = ~mpg) %>%
slice(c(which.max(mpg), which.min(mpg))) %>%
add_annotations(text = ~mpg)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
If we want to point out the corresponding cyl value at
the maximum and minimum mpg.
plot_ly(data = mtcars, x = ~wt, y = ~mpg) %>%
slice(c(which.max(mpg), which.min(mpg))) %>%
add_annotations(text = ~factor(cyl))
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
Example 3: Mark each point with the values
of cyl.
plot_ly(data = mtcars, x = ~wt, y = ~mpg, mode = "text", text = ~factor(cyl))
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
Example 4: will only mark each point that a cyl of 4 (filter will only affect the annotations in the plot, not the whole data set)
plot_ly(mtcars, x = ~wt, y = ~mpg, mode = "markers", color = ~factor(cyl)) %>%
add_annotations(text = ~cyl, data = filter(mtcars, cyl == 4))
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
Line graph is good for time series datasets to see trends.
Example 1: The airmiles data
set
data("airmiles")
airmiles
## Time Series:
## Start = 1937
## End = 1960
## Frequency = 1
## [1] 412 480 683 1052 1385 1418 1634 2178 3362 5948 6109 5981
## [13] 6753 8003 10566 12528 14760 16769 19819 22362 25340 25343 29269 30514
Basic scatter plot
plot_ly(x = ~time(airmiles), y = ~airmiles, type = "scatter", mode = "markers")
Add lines to the plot
plot_ly(x = ~time(airmiles), y = ~airmiles, type = "scatter", mode = "markers+lines")
plot_ly(x = ~time(airmiles), y = ~airmiles, type = "scatter", mode = "lines")
OR
plot_ly(x = ~time(airmiles), y = ~airmiles) %>%
add_lines()
Example 2: The txhousing data
set
head(txhousing)
## # A tibble: 6 × 9
## city year month sales volume median listings inventory date
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Abilene 2000 1 72 5380000 71400 701 6.3 2000
## 2 Abilene 2000 2 98 6505000 58700 746 6.6 2000.
## 3 Abilene 2000 3 130 9285000 58100 784 6.8 2000.
## 4 Abilene 2000 4 98 9730000 68600 785 6.9 2000.
## 5 Abilene 2000 5 141 10590000 67300 794 6.8 2000.
## 6 Abilene 2000 6 156 13910000 66900 780 6.6 2000.
plot_ly(txhousing, x = ~date, y = ~median, mode = "line", color = ~city)
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plotly.com/r/reference/#scatter
## Warning: Ignoring 616 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
plot_ly(txhousing, x= ~date, y = ~median) %>%
add_lines(color = ~city) %>%
hide_legend()
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
When a data frame is associated with a plotly object, it
allows us to manipulate the data underlying that object in the same way
we would directly manipulate the data (dplyr)
We can manipulate data, express complex multi-layer plots, and chain
all operations together via %>%. It is more readable and
understandable.
txhousing %>%
group_by(city) %>%
plot_ly(x = ~date, y = ~median) %>%
add_lines(name = "Texan cities", alpha = I(0.2)) %>% #first trace: add one line per city
filter(city == "Houston") %>%
add_lines(name = "Houston", color = I("red")) # second trace
Sometimes the directed graph of a pipeline can be restrictive. (In this example, after filter the data down to Houston, there is no way to recover the original data inside the pipeline).
Solution 1:
txhousing %>%
group_by(city) %>%
plot_ly(x = ~date, y = ~median) %>%
# plots one line per city since p knows city is a grouping variable
add_lines(alpha = I(0.2), name = "Texan Cities") %>%
add_lines(name = "Houston", data = filter(txhousing, city == "Houston"), color = I("red")) %>%
add_lines(name = "San Antonio", data = filter(txhousing, city == "San Antonio"), color = I("black"))
Solution 2:
The add_fun() function helps to work-around this
restriction. It works by applying a function to the plotly
object, but does not affect the data associated with the
plotly object.
The add_fun() function that accepts a plot object as
input, possibly applies a transformation to the data, and maps that data
to visual objects.
layer_city <- function(plot, name) { # a plot as input
plot %>% filter(city == name) %>% add_lines(name = name) # apply transformation to the data
}
txhousing %>%
group_by(city) %>%
plot_ly(x=~date, y=~median) %>%
add_lines(name="Texan cities", alpha=I(0.2)) %>% # first trace: allcities
add_fun(layer_city, "Houston") %>% # map that data to plot object
add_fun(layer_city, "San Antonio")
Extension: One more example for
add_fun()
layer_iqr <- function(plot) {
plot %>%
group_by(date) %>%
summarise(
q1 = quantile(median, 0.25, na.rm = TRUE),
m = median(median, na.rm = TRUE),
q3 = quantile(median, 0.75, na.rm = TRUE)
) %>%
add_lines(y = ~m, name = "median", color = I("black")) %>%
add_ribbons(ymin = ~q1, ymax = ~q3, name = "IQR", color = I("yellow"))
}
txhousing %>%
group_by(city) %>%
plot_ly(x=~date, y=~median) %>%
add_lines(name="Texan cities", alpha=I(0.2)) %>% # first trace: allcities
add_fun(layer_iqr) %>%
add_fun(layer_city, "Houston") %>%
add_fun(layer_city, "San Antonio")
Example 3: Provide a line graph to visualize the trend of unemployment rate over time in economics data set, and mark the date on the point with the highest unemployment rate.
economics %>%
plot_ly(x = ~date, y = ~unemploy) %>%
add_lines() %>%
slice(which.max(unemploy)) %>%
add_annotations(text = ~date)
plot_ly()ggmap PackageThe package makes it easy to retrieve raster map tiles from popular online mapping services like Stamen Maps and Google Maps, and plot them using the ggplot2 framework:
We can add the points (with longitude and latitude) to the map
Example 1: find the airports with the highest arrival delay in a map
data1 <- flights %>%
group_by(dest) %>%
summarize(average = mean(arr_delay, na.rm = TRUE)) %>%
left_join(airports, by=c("dest" = "faa"))
head(data1)
## # A tibble: 6 × 9
## dest average name lat lon alt tz dst tzone
## <chr> <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ABQ 4.38 Albuquerque International … 35.0 -107. 5355 -7 A Amer…
## 2 ACK 4.85 Nantucket Mem 41.3 -70.1 48 -5 A Amer…
## 3 ALB 14.4 Albany Intl 42.7 -73.8 285 -5 A Amer…
## 4 ANC -2.5 Ted Stevens Anchorage Intl 61.2 -150. 152 -9 A Amer…
## 5 ATL 11.3 Hartsfield Jackson Atlanta… 33.6 -84.4 1026 -5 A Amer…
## 6 AUS 6.02 Austin Bergstrom Intl 30.2 -97.7 542 -6 A Amer…
Note: Syntax:
c(lowerleftlon, lowerleftlat, upperrightlon, upperrightlat)
us <- c(left = -125, bottom = 25.75, right = -67, top = 49)
Different map types:
maptype= c("terrain", "terrain-background", "terrain-labels", "terrain-lines",
"toner", "toner-2010", "toner-2011", "toner-background", "toner-hybrid",
"toner-labels", "toner-lines", "toner-lite", "watercolor")
map <- get_stamenmap(us, zoom = 5, maptype = "toner-lite")
## ℹ Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
ggmap(map)
map1 <- ggmap(map) +
geom_point(data = data1, aes(x = lon, y = lat, color = average, size = average, text = name), na.rm = T) +
scale_color_gradient(low = "green", high = "darkblue")
## Warning: Ignoring unknown aesthetics: text
ggplotly(map1)
Example 2: find the most popular airport in a map
data2 <- flights %>%
drop_na() %>%
count(dest) %>%
left_join(airports, by = c("dest" = "faa"))
us <- c(left = -125, bottom = 25.75, right = -67, top = 49)
map <- get_stamenmap(us, zoom = 5, maptype = "toner-lite")
## ℹ Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
map2 <- ggmap(map) +
geom_point(data = data2, aes(x = lon, y = lat, color = n, size = n, text = name), na.rm = T) +
scale_color_gradient(low = "blue", high = "red")
## Warning: Ignoring unknown aesthetics: text
ggplotly(map2)
maps PacakgeIt allows us to turn data from the maps into a data frame suitable for plotting with ggplot.
The structure of those data frames:
Plot the USA map: using geom_polygon().
geom_polygon() drawn lines between points and “closes
them up” (i.e. draws a line from the last point back to the first
point). You have to map the group aesthetic to the group column.
Example 1: Get data set with States info to get a map
states <- map_data("state")
qplot(long, lat, data=states)
qplot(long, lat, data = map_data("world2"))
Example 2: We need to use
group=group if we want to connet the map with lines
qplot(long, lat, data = states, geom = "path")
qplot(long, lat, data = states, geom = "path", group = group)
Example 3 Use
geom = "polygon"
qplot(long, lat, data = states, geom = "polygon", group = group)
We can change the boarder color:
ggplot(states) +
geom_polygon(aes(x = long, y = lat, group = group), color = "red")
We can also change the filled color
qplot(long, lat, data = states, geom = "polygon", group = group, fill = long, color = "red")
Example 4: coord_fixed() fixes
the relationship between one unit in the y direction and one unit in the
x direction. every y unit was 1.3 times longer than an x unit, the plot
came out looking good.
ggplot(data = states) +
geom_polygon(aes(x = long, y = lat, fill = region, group = group), color = "white") +
coord_fixed(1.3) +
guides(fill = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.
Note: guides(fill = FALSE) turns off the color
legend.
Example 5: Create a map of the West Coast of the US.
west_coast <- subset(states, region %in% c("california", "oregon", "washington"))
ggplot(data = west_coast) +
geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black")
Apparently, the scale of this map is not correct. We can use
coord_quickmap() to let R choose the suitable ratio for
us.
ggplot(data = west_coast) +
geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black") +
coord_quickmap()
Example 6: Get a state map (Use Georgia for example)
ga_df <- map_data("state") %>%
filter(region == "georgia")
ggplot(data = ga_df) +
geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black")
We can change the theme to make the map more distinctive.
ggplot(data = ga_df) +
geom_polygon(aes(x = long, y = lat, group = group), fill = "palegreen", color = "black") +
coord_quickmap() +
theme_void()
We can change the color.
ggplot(data = ga_df, mapping = aes(x = long, y = lat, group = group)) +
coord_quickmap() +
geom_polygon(color = "black", fill = "gray") +
theme_void()
Example 7: Get county map
state_df <- map_data("county") %>%
filter(region == "georgia")
ggplot(data = ga_df, mapping = aes(x = long, y = lat, group = group)) +
coord_quickmap() +
geom_polygon(color = "black", fill = "gray") +
theme_void() +
geom_polygon(data = ga_df, aes(x = long, y = lat, group = group),fill = NA, color = "white") +
geom_polygon(data = state_df, color = "black", fill = NA)
Example 8: Mark the population of each county in Georgia
GAdat <- read.csv("GAdat.csv")
GAdat$County <- tolower(GAdat$County)
ga_df$subregion <- replace(ga_df$subregion, ga_df$subregion=="de kalb", "dekalb")
mapdat <- left_join(GAdat, state_df, by = c("County" = "subregion"))
ggplot(mapdat, aes(x=long, y=lat, group = group)) +
geom_polygon(aes(fill = Population, color="yellow"),colour = alpha("red", 1/2)) +
scale_fill_gradient(low="blue", high="red") +
geom_polygon(data = state_df, colour = "black", fill = NA) + theme_void() +
coord_quickmap()
Example 9: We can also play with colors:
ggplot(mapdat, aes(long, lat, group = group)) +
geom_polygon(aes(fill = Population, color="yellow"), colour = alpha("red", 1/2)) +
geom_polygon(data = state_df, colour = "black", fill = NA) +
theme_void() +
coord_fixed(1.2)+
scale_fill_gradientn(colours = rev(rainbow(7)),
breaks = c(2, 4, 10, 100, 1000, 10000),
trans = "log10")
world <- map_data("world")
ggplot(data = world) +
geom_polygon(aes(x = long, y = lat, group = group, fill = region), color = "white") +
coord_quickmap() +
guides(fill = FALSE)
## Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> =
## "none")` instead.